#Set directory
setwd("~/Dropbox/PoliticalScienceTopics/replication")
#clean history
rm(list=ls())

#load libraries
library(reshape2)
options("scipen" = 10)
options()$scipen
library(ggplot2)
library(dplyr)

######read data

main<- read.csv("csv_main.csv", stringsAsFactors = F)
rob<- read.csv("csv_robust.csv", stringsAsFactors = F)
ngram_data <- read.csv("ngram_fiction.csv", stringsAsFactors = F, sep=",", header=T)

############PLOT Figure 1 left panel
##number of abstracts with at least one positive/negative word divided by number of abstracts
#1. melt data for ggplot
names(main)
all_abstract_long <- melt(main[,c("year",
                                  "rel_pos_abs","rel_neg_abs")], 
                          id.vars = c("year"), variable.name = "type",
                          value.name = "type_value")


rel.both_smooth <- ggplot(all_abstract_long  , aes(x=year, y=type_value, group=type, linetype=type)) +
  geom_smooth(aes(linetype=type),color="black", span = 0.5, se=FALSE)+
  annotate("text", x=2003, y=0.15, label= "Positive words") + 
  annotate("text", x=2003, y=0.05, label= "Negative words") + 
  scale_x_continuous("Year",breaks=c(1990,2000,2010),labels=c(1990,2000,2010)) +
  
  theme_bw()+
  ylab("Proportion of abstracts")+xlab("Year")+
  theme(legend.position="none")

rel.both_smooth 

ggsave(plot=rel.both_smooth, file="figure1left_relative.pdf", dpi=600, width=10, height=7)


###Figure 1 right panel
#plot share of all positive/negative words

#reshape for ggplot
names(main)
wordcount_long<- melt(main[,c("year","sharepositive","sharenegative")], 
                      id.vars = c("year"), variable.name = "type",
                      value.name = "sharewords")
sharewords_smooth <- ggplot(wordcount_long, aes(x=year, y=sharewords, group=type, linetype=type)) +
  geom_smooth(aes(linetype=type), color="black",span = 0.5, se=FALSE)+
  annotate("text", x=2001, y=0.0013, label= "Positive words") + 
  annotate("text", x=2001, y=0.0003, label= "Negative words") + 
  scale_x_continuous(breaks=c(1990,2000,2010),labels=c(1990,2000,2010)) +
  scale_y_continuous(breaks=c(0.0000,0.0005,0.0010,0.0015,0.0020), labels=c("0.0000","0.0005","0.0010","0.0015","0.0020"), 
                     limits=c(0.00005,0.00207)) +
  theme_bw()+ylab("Proportion of words")+
  xlab("Year")+
  theme(legend.position="none")
sharewords_smooth

ggsave(plot=sharewords_smooth, file="figure1right_share.pdf",dpi=600,  width=10, height=7) 

##############################Figure 3
#plot at least one positive/negative word for each word in abstract; overall occurrence >5
##########reshape for plotting
names(main)
worddummy_year_eachword_long <- melt(main[,c(1,10:30)], id.vars = c("year"), 
                                     variable.name = "word",
                                     value.name = "word_value")
#429; 
worddummy_year_eachword_long$type <- NA
worddummy_year_eachword_long[1:429,]$type <- "positive"
worddummy_year_eachword_long[430:693,]$type <- "negative"

##Change labels for plot
change <- as_labeller(c(
  'encouraging_dum'="encouraging",
  'enormous_dum'="enormous",
  'excellent_dum'="excellent",
  'favorable_dum'="favorable",
  'innovative_dum'="innovative",
  'novel_dum'="novel", 
  'prominent_dum'="prominent",
  'promising_dum'="promising", 
  'remarkable_dum'="remarkable",
  'robust_dum'="robust",
  'supportive_dum'="supportive", 
  'unique_dum'="unique",
  'unprecedented_dum'="unprecedented"))


positive_words_dummy<- ggplot(worddummy_year_eachword_long[ 
  worddummy_year_eachword_long$type=="positive",], 
  aes(year, word_value))+
  facet_wrap(~word,scales = "free",labeller =change ) +
  geom_smooth( span = 0.5, se=FALSE,color="black")+
  ylab("Proportion of abstracts")+xlab("Year")+theme_bw()+
  theme(axis.text.x=element_text(size=14),
        axis.text.y=element_text(size=14),
        axis.title = element_text(size = 16))
positive_words_dummy
ggsave(plot=positive_words_dummy, file="figure3_eachword.pdf",dpi = 600, width=10, height=7)

################
### Figure 2 Google NGram Data

## Plot
ngram_data <- rename(ngram_data, sharewords = share)
ngram_data$sharewords <- as.numeric(ngram_data$sharewords)
positive <- filter(wordcount_long, type=="sharepositive")

ngram_plot <- ggplot(ngram_data, aes(x=year, y=sharewords)) +
  geom_smooth(color="black",span = 0.5, se=FALSE)+
  geom_smooth(data = positive, aes(x=year, y=sharewords, linetype=type), color="grey",span = 0.5, se=FALSE)+
  annotate("text", x=1997, y=0.0013, label= "Positive words (journals)", color="grey") +
  annotate("text", x=2000, y=0.0003, label= "Positive words (Google Books)") +
  scale_x_continuous(breaks=c(1980,1990,2000,2010),labels=c(1980,1990,2000,2010), limits=c(1982,2014)) +
  scale_y_continuous(breaks=c(0.0000,0.0005,0.0010,0.0015,0.0020), labels=c("0.0000","0.0005","0.0010","0.0015","0.0020"), 
                     limits=c(0.00005,0.00207)) +
  theme_bw()+ylab("Proportion of words")+
  xlab("Year")+
  theme(legend.position="none")
ngram_plot

ggsave("figure2_ngram.pdf", plot = ngram_plot,dpi=600,  width=10, height=7)

#####################################
###Robustness based only on APSR and AJPS

######Figure A.1 left panels
#melt for ggplot

all_abstract_aa_long <- melt(rob[,c("year","rel_pos_abs","rel_neg_abs")], id.vars = c("year"), variable.name = "type",
                             value.name = "type_value")
rel.both_smooth_aa <- ggplot(all_abstract_aa_long, aes(x=year, y=type_value, group=type, linetype=type)) +
  geom_smooth(aes(linetype=type),color="black", span = 0.5, se=FALSE)+
  annotate("text", x=1995, y=0.15, label= "Positive words") + 
  annotate("text", x=1995, y=0.06, label= "Negative words") + 
  scale_x_continuous(breaks=c(1980,1990,2000,2010),labels=c(1980,1990,2000,2010)) +
  theme_bw()+ylab("Proportion of abstracts")+
  xlab("Year")+
  theme(legend.position="none")
rel.both_smooth_aa 

ggsave(plot=rel.both_smooth_aa, file="appendix_figure1left_relative.pdf", dpi=600,  
       width=10, height=7)

###Figure A.1 right panel

#reshape
names(rob)
wordcount_long_aa<- melt(rob[,c("year","sharepositive","sharenegative")], 
                         id.vars = c("year"), variable.name = "type",
                         value.name = "sharewords")

sharewords_smooth_aa <- ggplot(wordcount_long_aa, aes(x=year, y=sharewords, group=type, linetype=type)) +
  geom_smooth(aes(linetype=type), color="black",span = 0.5, se=FALSE)+
  annotate("text", x=1995, y=0.0012, label= "Positive words") + 
  annotate("text", x=1995, y=0.0004, label= "Negative words") + 
  scale_x_continuous(breaks=c(1980,1990,2000,2010),labels=c(1980,1990,2000,2010)) +
  scale_y_continuous(breaks=c(0.0000,0.0005,0.0010,0.0015,0.0020), labels=c("0.0000","0.0005","0.0010","0.0015","0.0020"), 
                     limits=c(0.00005,0.00207)) +
  theme_bw()+ylab("Proportion of words")+
  xlab("Year")+
  theme(legend.position="none")
sharewords_smooth_aa

ggsave(plot=sharewords_smooth_aa, file="appendix_figure1right_share.pdf",dpi=600,  
       width=10, height=7) 




